
# =============================================================
# File: App_B_count.R
# Purpose: Produces results from Appendix Section B: Count Models
# Paper: Foreignness as an Asset: European Carbon Regulation and the Relocation Threat among Multinational Firms
# Author: Patrick Bayer, patrick.bayer@strath.ac.uk
# Date: 5 October 2022
#
# Data: ./data.csv
#
# Technical disclaimer:
# All analyses in R version 4.1.2 (2021-11-01)
# RStudio 2022.07.1 Build 554 ("Spotted Wakerobin" Release (7872775e, 2022-07-22) for Windows)
# Windows 10 Enterprise, 64-bit
# 12th Gen Intel(R) Core(TM) i7-1270P 2.20 GHz with 32GB RAM
# =============================================================

library(lmtest)
library(sandwich)
library(MASS)
library(MatchIt)
library(cem)
library(cobalt)
library(cowplot)
library(ggpubr)
library(marginaleffects)
library(margins)
library(AER)
library(tidyverse)

# Load data
df <- read_csv(file = "./data.csv")


# Trim sample down to true within firm sample
# (1) Limit sample to European MNCs: no allocation data for domestically-owned operations for non-EU MNCs
# (2) Exclude MNCs with only foreign-owned operations as there is no within MNC variation

df$sample <- ifelse(df$mnc.eu==1 & df$foreign.share<1,1,0)
df <- df[df$sample==1,]
df$foreign <- as.factor(df$foreign) # set variable as categorical

# =============================================================
# APPENDIX B: Count models
# =============================================================

# The appendix provides results from several count models
# (1) Poisson models
# (2) Negative binomial models

# =============================================================
# (1) Poisson models
# =============================================================

# Poisson models with DV=0 excluded
# These models use the same samples than in the main text
# Dropping observations that get allocated zero permit is appropriate given the EU ETS rules
# See SI materials for a further discussion

# Check mean and variance of DV
var(df$alloc1[df$alloc1>0], na.rm=TRUE)
mean(df$alloc1[df$alloc1>0], na.rm=TRUE)


# Model (1)
pois1 <- glm(alloc1~foreign+logAlloc0+logEmit0+as.factor(iso2)+as.factor(BVD), data=df[df$alloc1>0,], family="poisson")

# Observations
nobs(pois1)
length(pois1$xlevels$`as.factor(iso2)`)
length(pois1$xlevels$`as.factor(BVD)`)

# Marginal effects
summary(marginaleffects(pois1, variable="foreign", vcov=sandwich::vcovHC(pois1, type="HC0")))

# Dispersion tests for linear and quadratic variance functions 
dispersiontest(pois1, trafo=1) # NB1 in Cameron and Trivedi 2005 (reported in Table B1)
dispersiontest(pois1, trafo=2) # NB2 in Cameron and Trivedi 2005


# Model (2)
pois2 <- glm(alloc1~foreign+logAlloc0+logEmit0+as.factor(iso2)+as.factor(ETSsector)+as.factor(BVD), data=df[df$alloc1>0,], family="poisson")

# Observations
nobs(pois2)
length(pois2$xlevels$`as.factor(iso2)`)
length(pois2$xlevels$`as.factor(ETSsector)`)
length(pois2$xlevels$`as.factor(BVD)`)

# Marginal effects
summary(marginaleffects(pois2, variable="foreign", vcov=sandwich::vcovHC(pois2, type="HC0")))

# Dispersion tests for linear and quadratic variance functions 
dispersiontest(pois2, trafo=1) # NB1 in Cameron and Trivedi 2005 (reported in Table B1)
dispersiontest(pois2, trafo=2) # NB2 in Cameron and Trivedi 2005


# Model (3)

# Exact matching on 4-digit NACE code and firm
df$NACE <- ifelse(df$NACE=="",NA,df$NACE) # Recode empty strings as NA 

m <- matchit(foreign ~ NACE + BVD, data=df[is.na(df$NACE)==FALSE,], method="exact")
df.match <- match.data(m)

# Re-run main model on matched sample
pois3 <- glm(alloc1~foreign+logAlloc0+logEmit0+as.factor(iso2)+as.factor(ETSsector)+as.factor(BVD), data=df.match[df.match$alloc1>0,], weights=weights, family="poisson")

# Observations
nobs(pois3)
length(pois3$xlevels$`as.factor(iso2)`)
length(pois3$xlevels$`as.factor(ETSsector)`)
length(pois3$xlevels$`as.factor(BVD)`)

# Marginal effects
summary(marginaleffects(pois3, variable="foreign", vcov=sandwich::vcovHC(pois3, type="HC0")))

# Assess relative size of marginal effect
me.pois3 <- summary(marginaleffects(pois3, variable="foreign", vcov=sandwich::vcovHC(pois3, type="HC0")))$estimate
temp <- find_data(pois3)
me.pois3/mean(temp$alloc1)

# Dispersion tests for linear and quadratic variance functions 
dispersiontest(pois3, trafo=1) # NB1 in Cameron and Trivedi 2005 (reported in Table B1)
dispersiontest(pois3, trafo=2) # NB2 in Cameron and Trivedi 2005


# Poisson models with DV=0 included
# These models use somewhat larger samples than in the main text and are to show robustness
# Dropping observations that get allocated zero permit is appropriate given the EU ETS rules
# See SI materials for a further discussion

# Model (1a)
pois1a <- glm(alloc1~foreign+logAlloc0+logEmit0+as.factor(iso2)+as.factor(BVD), data=df, family="poisson")

# Observations
nobs(pois1a)
length(pois1a$xlevels$`as.factor(iso2)`)
length(pois1a$xlevels$`as.factor(BVD)`)

# Marginal effects
summary(marginaleffects(pois1a, variable="foreign", vcov=sandwich::vcovHC(pois1a, type="HC0")))

# Dispersion tests for linear and quadratic variance functions 
dispersiontest(pois1a, trafo=1) # NB1 in Cameron and Trivedi 2005 (reported in Table B1)
dispersiontest(pois1a, trafo=2) # NB2 in Cameron and Trivedi 2005


# Model (2a)
pois2a <- glm(alloc1~foreign+logAlloc0+logEmit0+as.factor(iso2)+as.factor(ETSsector)+as.factor(BVD), data=df, family="poisson")

# Observations
nobs(pois2a)
length(pois2a$xlevels$`as.factor(iso2)`)
length(pois2a$xlevels$`as.factor(ETSsector)`)
length(pois2a$xlevels$`as.factor(BVD)`)

# Marginal effects
summary(marginaleffects(pois2a, variable="foreign", vcov=sandwich::vcovHC(pois2a, type="HC0")))

# Dispersion tests for linear and quadratic variance functions 
dispersiontest(pois2a, trafo=1) # NB1 in Cameron and Trivedi 2005 (reported in Table B1)
dispersiontest(pois2a, trafo=2) # NB2 in Cameron and Trivedi 2005


# Model (3a)

# Exact matching on 4-digit NACE code and firm
df$NACE <- ifelse(df$NACE=="",NA,df$NACE) # Recode empty strings as NA 

m <- matchit(foreign ~ NACE + BVD, data=df[is.na(df$NACE)==FALSE,], method="exact")
df.match <- match.data(m)

# Re-run main model on matched sample
pois3a <- glm(alloc1~foreign+logAlloc0+logEmit0+as.factor(iso2)+as.factor(ETSsector)+as.factor(BVD), data=df.match, weights=weights, family="poisson")

# Observations
nobs(pois3a)
length(pois3a$xlevels$`as.factor(iso2)`)
length(pois3a$xlevels$`as.factor(ETSsector)`)
length(pois3a$xlevels$`as.factor(BVD)`)

# Marginal effects
summary(marginaleffects(pois3a, variable="foreign", vcov=sandwich::vcovHC(pois3, type="HC0")))

# Dispersion tests for linear and quadratic variance functions 
dispersiontest(pois3a, trafo=1) # NB1 in Cameron and Trivedi 2005 (reported in Table B1)
dispersiontest(pois3a, trafo=2) # NB2 in Cameron and Trivedi 2005

# # Running zero truncated Poisson models from the VGAM package runs into convergence issues
# # This code is provided for completeness only
# # When running the code without start values for coefficients, initialization fails
# # When providing start values, the algorithm converges at a half-step on these values again
# library(VGAM)
# pois1b <- vglm(alloc1~foreign+logAlloc0+logEmit0+as.factor(iso2)+as.factor(BVD), data=df.merge[df.merge$alloc1>0,], family=pospoisson(), trace=TRUE,
#                coefstart=coef(pois1))
# pois2b <- vglm(alloc1~foreign+logAlloc0+logEmit0+as.factor(iso2)+as.factor(ETSsector)+as.factor(BVD), data=df.merge[df.merge$alloc1>0,], family=pospoisson(), trace=TRUE,
#                coefstart=coef(pois2))


# =============================================================
# (2) Negative binomial models
# =============================================================

# Negative binomial models with DV=0 excluded
# These models use the same samples than in the main text
# Dropping observations that get allocated zero permit is appropriate given the EU ETS rules
# See SI materials for a further discussion

library(glm2) # using "glm.fit2" speeds up convergence

# Model (1)
nb1 <- glm.nb(alloc1~foreign+logAlloc0+logEmit0+as.factor(iso2)+as.factor(BVD), data=df[df$alloc1>0,], 
              method="glm.fit2", control=list(epsilon = 1e-8, maxit=1000, trace=TRUE))

# Observations
nobs(nb1)
length(nb1$xlevels$`as.factor(iso2)`)
length(nb1$xlevels$`as.factor(BVD)`)

# Marginal effects
summary(marginaleffects(nb1, variable="foreign", vcov=sandwich::vcovHC(nb1, type="HC0")))

# Assess relative size of marginal effect
me.nb1 <- summary(marginaleffects(nb1, variable="foreign", vcov=sandwich::vcovHC(nb1, type="HC0")))$estimate
temp <- find_data(nb1)
me.nb1/mean(temp$alloc1)



# Model (2)
nb2 <- glm.nb(alloc1~foreign+logAlloc0+logEmit0+as.factor(iso2)+as.factor(ETSsector)+as.factor(BVD), data=df[df$alloc1>0,], 
              method="glm.fit2", control=list(epsilon = 1e-8, maxit=1000, trace=TRUE))

# Observations
nobs(nb2)
length(nb2$xlevels$`as.factor(iso2)`)
length(nb2$xlevels$`as.factor(ETSsector)`)
length(nb2$xlevels$`as.factor(BVD)`)

# Marginal effects
summary(marginaleffects(nb2, variable="foreign", vcov=sandwich::vcovHC(nb2, type="HC0")))

# Assess relative size of marginal effect
me.nb2 <- summary(marginaleffects(nb2, variable="foreign", vcov=sandwich::vcovHC(nb2, type="HC0")))$estimate
temp <- find_data(nb2)
me.nb2/mean(temp$alloc1)


# Model (3)

# Exact matching on 4-digit NACE code and firm
df$NACE <- ifelse(df$NACE=="",NA,df$NACE) # Recode empty strings as NA 

m <- matchit(foreign ~ NACE + BVD, data=df[is.na(df$NACE)==FALSE,], method="exact")
df.match <- match.data(m)

# Re-run main model on matched sample
nb3 <- glm.nb(alloc1~foreign+logAlloc0+logEmit0+as.factor(iso2)+as.factor(ETSsector)+as.factor(BVD), data=df.match[df.match$alloc1>0,], 
              method="glm.fit2", control=list(epsilon = 1e-8, maxit=1000, trace=TRUE))


# Observations
nobs(nb3)
length(nb3$xlevels$`as.factor(iso2)`)
length(nb3$xlevels$`as.factor(ETSsector)`)
length(nb3$xlevels$`as.factor(BVD)`)

# Marginal effects
summary(marginaleffects(nb3, variable="foreign", vcov=sandwich::vcovHC(nb3, type="HC0")))

# Assess relative size of marginal effect
me.nb3 <- summary(marginaleffects(nb3, variable="foreign", vcov=sandwich::vcovHC(nb3, type="HC0")))$estimate
temp <- find_data(nb3)
me.nb3/mean(temp$alloc1)


# Negative binomial models with DV=0 included
# These models use somewhat larger samples than in the main text and are to show robustness
# Dropping observations that get allocated zero permit is appropriate given the EU ETS rules
# See SI materials for a further discussion


# Model (1a)
nb1a <- glm.nb(alloc1~foreign+logAlloc0+logEmit0+as.factor(iso2)+as.factor(BVD), data=df, 
               method="glm.fit2", control=list(epsilon = 1e-8, maxit=1000, trace=TRUE))

# Observations
nobs(nb1a)
length(nb1a$xlevels$`as.factor(iso2)`)
length(nb1a$xlevels$`as.factor(BVD)`)

# Marginal effects
summary(marginaleffects(nb1a, variable="foreign", vcov=sandwich::vcovHC(nb1a, type="HC0")))

# Assess relative size of marginal effect
me.nb1a <- summary(marginaleffects(nb1a, variable="foreign", vcov=sandwich::vcovHC(nb1a, type="HC0")))$estimate
temp <- find_data(nb1a)
me.nb1a/mean(temp$alloc1)



# Model (2a)
nb2a <- glm.nb(alloc1~foreign+logAlloc0+logEmit0+as.factor(iso2)+as.factor(ETSsector)+as.factor(BVD), data=df, 
               method="glm.fit2", control=list(epsilon = 1e-8, maxit=1000, trace=TRUE))

# Observations
nobs(nb2a)
length(nb2a$xlevels$`as.factor(iso2)`)
length(nb2a$xlevels$`as.factor(ETSsector)`)
length(nb2a$xlevels$`as.factor(BVD)`)

# Marginal effects
summary(marginaleffects(nb2a, variable="foreign", vcov=sandwich::vcovHC(nb2a, type="HC0")))

# Assess relative size of marginal effect
me.nb2a <- summary(marginaleffects(nb2a, variable="foreign", vcov=sandwich::vcovHC(nb2a, type="HC0")))$estimate
temp <- find_data(nb2a)
me.nb2a/mean(temp$alloc1)


# Model (3a)

# Exact matching on 4-digit NACE code and firm
df$NACE <- ifelse(df$NACE=="",NA,df$NACE) # Recode empty strings as NA 

m <- matchit(foreign ~ NACE + BVD, data=df[is.na(df$NACE)==FALSE,], method="exact")
df.match <- match.data(m)

# Re-run main model on matched sample
nb3a <- glm.nb(alloc1~foreign+logAlloc0+logEmit0+as.factor(iso2)+as.factor(ETSsector)+as.factor(BVD), data=df.match, 
               method="glm.fit2", control=list(epsilon = 1e-8, maxit=1000, trace=TRUE))


# Observations
nobs(nb3a)
length(nb3a$xlevels$`as.factor(iso2)`)
length(nb3a$xlevels$`as.factor(ETSsector)`)
length(nb3a$xlevels$`as.factor(BVD)`)

# Marginal effects
summary(marginaleffects(nb3a, variable="foreign", vcov=sandwich::vcovHC(nb3a, type="HC0")))

# Assess relative size of marginal effect
me.nb3a <- summary(marginaleffects(nb3a, variable="foreign", vcov=sandwich::vcovHC(nb3a, type="HC0")))$estimate
temp <- find_data(nb3a)
me.nb3a/mean(temp$alloc1)


# # Running zero truncated negative binomial models from the VGAM package produces almost identical point estimates
# # This code is provided for completeness only
# # Estimation may be slow
# library(VGAM)
# 
# # Model (1b)
# nb1b <- vglm(alloc1~foreign+logAlloc0+logEmit0+as.factor(iso2)+as.factor(BVD), data=df[df$alloc1>0,],
#              family=posnegbinomial(), trace=TRUE)
# 
# # Compare point estimates from two models
# summary(nb1)$coef[["foreign1","Estimate"]] # normal NB model
# nb1b@coefficients["foreign1"] # zero truncated NB model
# 
# # Assess difference in estimated effects
# diff1 <- abs(nb1$coefficients[2:length(nb1$coefficients)])-abs(nb1b@coefficients[3:length(nb1b@coefficients)])
# table(round(diff1,3)) # essentially identical up to three digits
# 
# # Model (2b)
# nb2b <- vglm(alloc1~foreign+logAlloc0+logEmit0+as.factor(iso2)+as.factor(ETSsector)+as.factor(BVD), data=df[df$alloc1>0,],
#              family=posnegbinomial(), trace=TRUE)
# 
# # Compare point estimates from two models
# summary(nb2)$coef[["foreign1","Estimate"]] # normal NB model
# nb2b@coefficients["foreign1"] # zero truncated NB model
# 
# # Assess difference in estimated effects
# diff2 <- abs(nb2$coefficients[2:length(nb2$coefficients)])-abs(nb2b@coefficients[3:length(nb2b@coefficients)])
# table(round(diff2,3)) # essentially identical up to three digits
# 
# 
# # Model (3b)
# 
# # Exact matching on 4-digit NACE code and firm
# df$NACE <- ifelse(df$NACE=="",NA,df$NACE) # Recode empty strings as NA 
# 
# m <- matchit(foreign ~ NACE + BVD, data=df[is.na(df$NACE)==FALSE,], method="exact")
# df.match <- match.data(m)
# 
# nb3b <- vglm(alloc1~foreign+logAlloc0+logEmit0+as.factor(iso2)+as.factor(ETSsector)+as.factor(BVD), data=df.match[df.match$alloc1>0,],
#              family=posnegbinomial(), trace=TRUE)
# 
# 
# # Compare point estimates from two models
# summary(nb3)$coef[["foreign1","Estimate"]] # normal NB model
# nb3b@coefficients["foreign1"] # zero truncated NB model
# 
# # Assess difference in estimated effects
# diff3 <- abs(nb3$coefficients[2:length(nb3$coefficients)])-abs(nb3b@coefficients[3:length(nb3b@coefficients)])
# table(round(diff3,3)) # essentially identical up to three digits



# =============================================================
#                       END OF CODE
# =============================================================